"""
完成练习案例: 单词计数统计
"""

# 1. 构建执行入口文件
from pyspark import SparkContext,SparkConf
import os
os.environ["PYSPARK_PYTHON"] = "C:/Users/86131/AppData/Local/Programs/Python/Python39/python.exe"
conf = SparkConf().setMaster("local[*]").setAppName("test_spark")
sc = SparkContext(conf=conf)
#2. 读取数据文件
rdd = sc.textFile("C:/Users/86131/Desktop/课程/python资料/资料/第15章资料/资料/hello.txt")
# print(rdd2.collect())
#3. 取出全部单词
rdd_word = rdd.flatMap(lambda x: x.split(" "))
#4. 将单词设置为二元元组,单词为key,value都设为1
word_with_one_rdd = rdd_word.map(lambda word: (word,1))
# print(word_with_one_rdd.collect())
#5. 分组并求和
result_word = word_with_one_rdd.reduceByKey(lambda x,y: x+y)
#6. 打印
print(result_word.collect())
#7.关闭链接
sc.stop()

